knitr::opts_chunk$set(echo = TRUE)
input_path <- params$input_file
lower_threshold <- params$thr_min 
upper_threshold <- params$thr_max
basal_Ucell_threshold <- params$basal_thr
classical_Ucell_threshold <- params$classical_thr
sample_name <- params$sample_name
working_dir <- params$working_dir
input <- file.path(input_path,sample_name)
filtered_matrix <- paste0(sample_name,'_filtered.csv')
cont_table_sctype <-  paste0(sample_name,'_table_sctype.csv')
cont_table_seurat <-  paste0(sample_name,'_table_seurat.csv')
annotated_coordinates <- paste0(sample_name,'_annotation.csv')
preprocessing_figures <- paste0(sample_name,'_preprocessing_figures.pdf')
library(Seurat)
## Le chargement a nécessité le package : SeuratObject
## Le chargement a nécessité le package : sp
## 'SeuratObject' was built under R 4.4.1 but the current version is
## 4.4.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 
## Attachement du package : 'SeuratObject'
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, t
library(SeuratData)
library(ggplot2)
library(patchwork)
library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
library(glmGamPoi)
## 
## Attachement du package : 'glmGamPoi'
## L'objet suivant est masqué depuis 'package:dplyr':
## 
##     vars
## L'objet suivant est masqué depuis 'package:ggplot2':
## 
##     vars
library(UCell)
library(HGNChelper)
## Please cite our software :) 
##  
##  Sehyun Oh et al. HGNChelper: identification and correction of invalid gene symbols for human and mouse. F1000Research 2020, 9:1493. DOI: https://doi.org/10.12688/f1000research.28033.1 
##  
##  Type `citation('HGNChelper')` for a BibTeX entry.
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")
source("https://raw.githubusercontent.com/kris-nader/sp-type/main/sp-type.R")
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")
library(ggplot2)
path <- paste0(input, '/outs/') 
if(!file.exists(path)){
  path <- paste0(input, '/outs_old/') 
}

if(file.exists(paste0(path, 'spatial/tissue_positions_list.csv'))){
  file.remove(paste0(path, 'spatial/tissue_positions_list.csv'))
}

data <- Load10X_Spatial(data.dir = path, filename= 'filtered_feature_bc_matrix.h5')

Filtering

#remove non expressed gene
data@assays$Spatial$counts <- data@assays$Spatial$counts[rowSums(data@assays$Spatial$counts)>0,]
## Warning: Different features in new layer data than already exists for counts
plot1 <- VlnPlot(data, features = "nCount_Spatial", assay = "Spatial", pt.size = 0.1) + NoLegend()
## Warning: Default search for "data" layer in "Spatial" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot2 <- SpatialFeaturePlot(data, features = "nCount_Spatial", pt.size.factor = 3) + theme(legend.position = "right")
wrap_plots(plot1, plot2)

outlier_spot <- rownames(data@meta.data[data@meta.data$nCount_Spatial>upper_threshold | data@meta.data$nCount_Spatial<lower_threshold,])

SpatialDimPlot(data, cells.highlight = outlier_spot, facet.highlight = TRUE, ncol = 3, pt.size.factor = 3)

filtered_data <- subset(data, nCount_Spatial > lower_threshold & nCount_Spatial < upper_threshold)
## Warning: Not validating Centroids objects
## Not validating Centroids objects
## Warning: Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Warning: Not validating Seurat objects
plot1 <- VlnPlot(filtered_data, features = "nCount_Spatial", assay = "Spatial", pt.size = 0.1) + NoLegend()
## Warning: Default search for "data" layer in "Spatial" assay yielded no results;
## utilizing "counts" layer instead.
plot2 <- SpatialFeaturePlot(filtered_data, features = "nCount_Spatial", pt.size.factor = 3) + theme(legend.position = "right")
wrap_plots(plot1, plot2)

filtered_count <- filtered_data@assays$Spatial$counts
write.csv(t(filtered_count), file = file.path(working_dir, filtered_matrix), row.names = TRUE)

Normalization

options(future.globals.maxSize = 8000 * 1024^2)
normalized_data <- SCTransform(filtered_data, assay = "Spatial", verbose = FALSE)
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
normalized_data <- RunPCA(normalized_data, assay = "SCT", verbose = FALSE)
normalized_data <- FindNeighbors(normalized_data, reduction = "pca", dims = 1:30, verbose = FALSE)
normalized_data <- FindClusters(normalized_data, verbose = FALSE)
normalized_data <- RunUMAP(normalized_data, reduction = "pca", dims = 1:30, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
plot(density(unlist(normalized_data@assays$SCT@scale.data)), lwd = 2)

SpatialFeaturePlot(normalized_data, features = c("TFF1", "SPRR3"), pt.size.factor = 3)

Annotation

loc <- read.csv(paste0(path,  '/spatial/tissue_positions.csv'))[,c('barcode', 'pxl_col_in_fullres', 'pxl_row_in_fullres')]
row.names(loc) <- loc$barcode
loc <- loc[,-1]
colnames(loc) <- c('x', 'y')

#removing of spots from loc but absent from count matrix 
row_to_remove <- setdiff(rownames(loc), colnames(filtered_count))
loc <- loc[!(rownames(loc) %in% row_to_remove),]
loc <- loc[order(rownames(loc)), ]

Ucell

gene.sets <- list() 
gene.sets$classical <- c("TFF1","TFF2","TFF3","CEACAM6", "LGALS4", "ST6GALNAC1", "PLA2G10","TSPAN8","LYZ","MYO1A", "VSIG2", "CLRN3", "CDH17", "AGR3", "AGR2", "BTNL8", "ANXA10", "FAM3D", "CTSE", "REG4")
gene.sets$basal <- c("SERPINB3", "SPRR3","SERPINB4", "VGLL1","DHRS9", "SPRR1B", "KRT17", "KRT15", "TNS4", "SCEL", "KRT6A", "KRT7", "CST6", "LY6D", "FAM83A", "AREG", "FGFBP1", "GPR87", "LEMD1","S100A2","SLC2A1")

ucell_obj <- AddModuleScore_UCell(normalized_data, features = gene.sets, slot = 'data')
signature.names <- paste0(names(gene.sets), "_UCell")
map1 <- SpatialFeaturePlot(ucell_obj, features = signature.names[1], pt.size.factor = 3) +
  scale_fill_gradientn(limits = c(0, 0.8), colors = c('blue',  'yellow', 'red'))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
map2 <- SpatialFeaturePlot(ucell_obj, features = signature.names[2], pt.size.factor = 3) +
  scale_fill_gradientn(limits = c(0, 0.8), colors = c('blue',  'yellow', 'red'))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
uscore_plot <- wrap_plots(map1, map2)
print(uscore_plot)

clrn3_pos <- colnames(normalized_data@assays$Spatial["counts"])[normalized_data@assays$Spatial["counts"]["CLRN3", ] > 1]
dhrs9_pos <- colnames(normalized_data@assays$Spatial["counts"])[normalized_data@assays$Spatial["counts"]["DHRS9", ] > 13]
tns4_pos <- colnames(normalized_data@assays$Spatial["counts"])[normalized_data@assays$Spatial["counts"]["TNS4", ] > 7]
#obj from classical_basal_identification.Rmd
classical <- names(ucell_obj$classical_UCell[ucell_obj$classical_UCell > classical_Ucell_threshold])
basal <- names(ucell_obj$basal_UCell[ucell_obj$basal_UCell > basal_Ucell_threshold])

loc$'uscore label' <- 'other' #Initialization
if(length(classical)>0){
  loc[rownames(loc) %in% classical,]$'uscore label' <- 'classical'}
if(length(basal)>0){
  loc[rownames(loc) %in% basal,]$'uscore label' <- 'basal'}
if(length(intersect(classical, basal))>0){
  loc[rownames(loc) %in% intersect(classical, basal),]$'uscore label' <- 'classical and basal'}

ucell_obj <- AddMetaData(ucell_obj, metadata = loc)
loc$'uscore label' <- 'other' #Initialization
if(length(classical)>0){
  loc[rownames(loc) %in% krt6a_pos,]$'uscore label' <- 'KRT6A'}
if(length(basal)>0){
  loc[rownames(loc) %in% fam83a_pos,]$'uscore label' <- 'FAM83A'}
if(length(intersect(classical, basal))>0){
  loc[rownames(loc) %in% intersect(krt6a_pos, fam83a_pos),]$'uscore label' <- 'KRT6A and FAM83A'}
classical_basal_plot <- SpatialPlot(ucell_obj, group.by = 'uscore label', pt.size.factor = 3, image.alpha = 0.4)+
  theme(legend.text = element_text(size=14), legend.title = element_text(size=14))+
  scale_fill_manual(values = c('basal'='red', 'classical'='yellow', 'classical and basal' = '#ff8503', 'other' = 'blue'))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(classical_basal_plot)

sctype

#sctype_result <- run_sctype(normalized_data, known_tissue_type="Pancreas", slot="SCT")
#loc$'sctype label' <- sctype_result$sctype_classification[rownames(loc)]
# DB file
db_ <- "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx";
tissue <- "Pancreas" # e.g. Immune system,Pancreas,Liver,Eye,Kidney,Brain,Lung,Adrenal,Heart,Intestine,Muscle,Placenta,Spleen,Stomach,Thymus 

# prepare gene sets
gs_list <- gene_sets_prepare(db_, tissue)
## Warning in checkGeneSymbols(markers_all): Human gene symbols should be all
## upper-case except for the 'orf' in open reading frames. The case of some
## letters was corrected.
## Warning in checkGeneSymbols(markers_all): x contains non-approved gene symbols
sctype_scores <- sctype_score(scRNAseqData = as.matrix(normalized_data@assays$SCT@scale.data), scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

score_table <- as.data.frame(t(sctype_scores))
score_table$sctype_classification <- apply(score_table, 1, function(x) names(x)[which.max(x)])


sctype_result <- normalized_data
sctype_result@meta.data$sctype_classification <- score_table$sctype_classification
loc$'sctype label' <- sctype_result$sctype_classification[rownames(loc)]
library(RColorBrewer)
types_classif <- sort(unique(sctype_result@meta.data$sctype_classification), decreasing = TRUE)
n_classif <- length(types_classif)
base_palette <- colorRampPalette(brewer.pal(10, "Set3"))
colors_classif <- setNames(base_palette(n_classif), types_classif)

sctype_plot <- SpatialDimPlot(sctype_result, group.by="sctype_classification", pt.size.factor = 3, image.alpha = 0.4)+
  scale_fill_manual(values = colors_classif)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(sctype_plot)

Ucell + sctype

sctype_barplot <- ggplot(loc, aes(fill=`sctype label`, x = `uscore label`)) +
  geom_bar(position = "stack") +
  scale_fill_manual(values = colors_classif)+
  labs(x='Uscore labels', y='Number of Spots', fill='ScType annotation')+
  theme(
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12) 
  )

print(sctype_barplot)

contingency_sctype_table <- table(loc$`uscore label`, loc$`sctype label`)
contingency_sctype_table_with_totals <- addmargins(contingency_sctype_table)
write.csv(contingency_sctype_table_with_totals, file = file.path(working_dir, cont_table_sctype), row.names = TRUE)
sctype_ucell_plot <- ggplot(loc, aes(x = x, y = y)) +
  geom_point(aes(fill = `sctype label`, color = `uscore label`), shape = 21, size = 1.4, stroke = 0.6) +
  scale_color_manual(values = c('basal'='red', 'classical'='#FFDB58', 'classical and basal' = '#ff8503', 'other' = 'blue')) +
  theme_void() +                          
  coord_fixed() +   
  scale_y_reverse() +
  scale_shape_manual(values = 1:10) +      
  theme(legend.position = "right")

print(sctype_ucell_plot)

loc$'sctype + uscore label' <- loc$'sctype label'
sctype_result@meta.data$'sctype_uscore' <- sctype_result@meta.data$'sctype_classification'

if(length(classical)>0){
  loc[rownames(loc) %in% classical & loc$`sctype label` == "Ductal cells",]$'sctype + uscore label' <- 'classical'
  sctype_result@meta.data[rownames(loc) %in% classical & loc$`sctype label` == "Ductal cells",]$'sctype_uscore' <- 'Classical'
}
if(length(basal)>0){
  loc[rownames(loc) %in% basal & loc$`sctype label` == "Ductal cells",]$'sctype + uscore label' <- 'basal'
  sctype_result@meta.data[rownames(loc) %in% basal & loc$`sctype label` == "Ductal cells",]$'sctype_uscore' <- 'Basal'
  }
if(length(intersect(classical, basal))>0){
  loc[rownames(loc) %in% intersect(classical, basal) & loc$`sctype label` == "Ductal cells",]$'sctype + uscore label' <- 'classical and basal'
  sctype_result@meta.data[rownames(loc) %in% intersect(classical, basal) & loc$`sctype label` == "Ductal cells",]$'sctype_uscore' <- 'Classical and Basal'
  }
types_uscore <- unique(sctype_result@meta.data$sctype_uscore)

manual_types <- c("Classical", "Basal", "Classical and Basal") 
manual_colors <- c("Classical" = "yellow",  
                   "Basal" = "red", 
                   "Classical and Basal" = "#ff8503")

new_types <- setdiff(manual_types, types_classif)
final_colors <- c(colors_classif, manual_colors[new_types])
final_colors <- final_colors[types_uscore]

sctype_ucell_plot2 <- SpatialDimPlot(sctype_result, group.by="sctype_uscore", pt.size.factor = 3, image.alpha = 0.4)+
  scale_fill_manual(values = final_colors, name ='ScType & Uscore annotation' ) 
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(sctype_ucell_plot2)

Annotation with Peng data

#peng_sc <- readRDS("../../../datashare/genref_hadaca3/00_peng_k_2019.rds")
purist_peng_sc <- readRDS("../../../datashare/genref_hadaca3/peng_sub.rds")
sc_ref[["percent.mt"]] <- PercentageFeatureSet(sc_ref, pattern = "^MT-")
sc_temp <- sc_ref
Idents(sc_temp) <- "all_cells"
VlnPlot(sc_temp, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(sc_temp, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(sc_temp, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
VlnPlot(sc_ref_normalized, features = c('KRT19', 'PRSS1'), group.by ='cell_type')
data_normalization <- function(data){
  normalized_data <- SCTransform(data, ncells = 5000, verbose = FALSE) %>%
      RunPCA(verbose = FALSE) %>%
      #FindNeighbors(dims = 1:30, verbose = FALSE) %>%
      #FindClusters(verbose = FALSE) %>%
      RunUMAP(dims = 1:30, verbose = FALSE)
  return(normalized_data)}

single_cell_integration <- function(normalized_ref_data, normalized_query_data, label_column, new_colname){
  anchors <- FindTransferAnchors(reference = normalized_ref_data, 
                                 query = normalized_query_data, 
                                 #recompute
                                 dims = 1:30, 
                                 reference.reduction = "pca",
                                 normalization.method = "SCT",
                                 project.query = FALSE)

  predictions <- TransferData(anchorset = anchors, refdata=normalized_ref_data@meta.data[[label_column]], weight.reduction = normalized_query_data[['pca']], dims = 1:30)
  
  normalized_query_data <- AddMetaData(normalized_query_data, metadata = predictions)
  
  names(normalized_query_data@meta.data)[names(normalized_query_data@meta.data) == "predicted.id"] <- new_colname
  
  return(normalized_query_data)
}
#normalized_peng_data <- data_normalization(peng_sc)
#normalized_data <- single_cell_integration(normalized_peng_data, normalized_data, 'cell_type', 'peng_original_data')

normalized_purist_peng_data <- data_normalization(purist_peng_sc)
#normalized_data <- single_cell_integration(normalized_purist_peng_data, normalized_data, 'cell_type', 'peng_subset')
normalized_data <- single_cell_integration(normalized_purist_peng_data, normalized_data, 'hadaca3', 'purist_peng')
#peng_original_plot <- SpatialDimPlot(normalized_data, group.by="peng_original_data", pt.size.factor = 3, image.alpha = 0.4)
#peng_subset_plot <- SpatialDimPlot(normalized_data, group.by="peng_subset", pt.size.factor = 3, image.alpha = 0.4)
purist_peng_plot <- SpatialDimPlot(normalized_data, group.by="purist_peng", pt.size.factor = 3, image.alpha = 0.4)
#peng_annot_plots <- wrap_plots(peng_original_plot, peng_subset_plot)
print(purist_peng_plot)
#loc$'peng original' <-  normalized_data$peng_original_data[rownames(loc)]
loc$'purist peng' <-  normalized_data$purist_peng[rownames(loc)]
peng_barplot2 <- ggplot(loc, aes(fill=`purist peng`, x = `uscore label`)) +
         geom_bar(position = "stack")
print(peng_barplot2)
contingency_seurat_table <- table(loc$`uscore label`, loc$`purist peng`)
contingency_seurat_table_with_totals <- addmargins(contingency_seurat_table)
write.csv(contingency_seurat_table_with_totals, file = file.path(working_dir, cont_table_seurat), row.names = TRUE)
write.csv(loc, file = file.path(working_dir, annotated_coordinates), row.names = TRUE)
pdf(file.path(working_dir, preprocessing_figures))
print(classical_basal_plot)
print(sctype_plot)
print(sctype_barplot)
print(sctype_ucell_plot)
print(sctype_ucell_plot2)
#print(peng_original_plot)
#print(peng_subset_plot)
#print(purist_peng_plot)
#print(peng_barplot)
#print(peng_barplot2)
dev.off()
## quartz_off_screen 
##                 2